#!/usr/bin/env python
#encoding=utf-8

import HTMLParser
import urllib2
import cookielib
import os
import sys
import socket
import re
import httplib2
import torrent
from BeautifulSoup import BeautifulSoup
from urllib import urlencode

import config

Cookie = config.NEXUSCOOKIE

header = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'),
            ('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
            ('Cookie', Cookie)]
            
h = httplib2.Http()

def printf(str):
    print str.decode('UTF-8').encode('GBK')

class NexusPHP(object):

    def __init__(self):
        self.cookies = cookielib.CookieJar()
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies))
        self.opener.addheaders = header
        self.page = 0
        self.failed_down_torrents = []

    def downloadTorrent(self, id) :
        get_url = 'http://chdbits.org/download.php?id=' + id
        try:
            ret = self.opener.open(get_url, timeout = 30).read()
            f = file(config.TORRENT_DIR + id + '.torrent','wb')
            f.write(ret)
            f.close()
            return 0
        except:
            printf("get .torrent failed id=" + id)
            return 1
            
    def getTorrent(self, maxcount) :
        get_url = config.NEXUS_PAGE_URL
        #通过Cookies获取
        ret = self.opener.open(get_url + '&page=' + str(self.page)).read()
        
        #soup = BeautifulSoup(ret)
        #torrenttbale = soup.find(attrs={'id':'torrenttbale'})
        #soup = BeautifulSoup(str(torrenttbale))
        #row = soup.findAll('tr')
        #for i in range(1, len(row)) :
            #soup = BeautifulSoup(str(row[i]))
            #BeautifulSoup用正则匹配中文好像会出现乱码
            #print soup.find('a', attrs = {'title' : re.compile('\S')}).b.string
            #玛德为何取不到
            #print soup.findAll(attrs = {'href' : re.compile('seeders$')}).string
            #print soup.contents[0].contents[4].string
            
            #str_tiem = str(row[i])
            #str_find_beg = '<a title="'
            #str_find_end = '"'
            #pos_start = str_tiem.find(str_find_beg) + len(str_find_beg)
            #pos_end = str_tiem.find(str_find_end, pos_start)
        blocks=re.findall('<tr><td class="rowfollow nowrap category-icon">(.*?)</tr>',ret,re.DOTALL)
        ids=[]
        titles=[]#标题
        titles_sub=[]#副标题
        seeders=[]#int
        leechers=[]#int
        toInt=lambda x:0 if x==[] else int(x[0])
        toStr=lambda x:'' if x==[] else x[0]
        fmtSize=lambda x:round(float(x[:x.find('<')])*(1.0 if x[-3]=='G' else (1.0/1024 if  x[-3]=='M' else 1024.0)),2)
        blocks=re.findall('<tr><td class="rowfollow nowrap category-icon">(.*?)</tr>',ret,re.DOTALL)
        for b in blocks:
            ids.append(re.findall('details.php\?id=(\d+)&amp;hit=1"',b)[0])
            seeders.append(toInt(re.findall('seeders">(\d+)<',b)))
            leechers.append(toInt(re.findall('leechers">(\d+)<',b)))
            size.append(fmtSize(re.findall('<td class="rowfollow">(.*?)</td>',b)[1]))
            titles.append(re.findall('hit=1">(.*?)</a>',b)[0])
            titles_sub.append(toStr(re.findall('h3 title="([^"]+)"',b)))
        #reg = re.compile('details.php\?id=(\d+)&amp;hit=1&amp;dllist=1#seeders"')
        #ids = re.findall(reg, ret)
        #bug seed = 0
        #reg = re.compile('#seeders">(\d+)')
        #seeds = re.findall(reg, ret)
        #reg = re.compile('<a title="(.*?)"  href="details.php')
        #titles = re.findall(reg, ret)
        
        #if not len(ids) == len(seeds) :
        #    print 'HTML format unknow'
        #    return 1
        n = 0
        id_list = []
        for id in ids :
            #if(int(seeds[n]) > 2) :
            get_url = config.MY_DATAQUERY_URL + '&type=is_get_torrent&ori_torrent_id=' + id
            head, ret = h.request(get_url)
            if ret == '0':
                if(maxcount <= 0):
                    break
                printf( '开始下载种子 title=%s %s.torrent === page=%d idx=%d' % (titles[n], id, self.page, n))
                if 0 == self.downloadTorrent(id):
                    printf('success!')
                    #修改announce地址为自己的私有地址
                    if config.USE_MY_TRACKER == True:
                        torrent.announce_change(id)
                    #将种子插入数据库
                    m = {'nexus_title' : titles[n]}
                    s = urlencode(m)
                    get_url = config.MY_DATAQUERY_URL + '&type=get_torrent&ori_torrent_id=' + id + '&' + s
                    h.request(get_url)
                    id_list.append(id)
                    maxcount = maxcount - 1
            #
            if n+1 == len(ids):
                self.page = self.page + 1
            n = n + 1

        return id_list
        #通过文件读入HTML
        #ret = open('data.txt').read()
        #reg = re.compile('details.php\?id=(.*?)&')
        #ids = re.findall(reg, ret)
        #index = 0
        #print ret